fprintf('\n==============================================================')
fprintf('\nDESCRIPTION: VALUE_PAGERANKS.m computes PageRank on a strongly connected set of employers.')
fprintf('\n')
fprintf('\nNOTES:       - MATLAB version R2015b or later is required for')
fprintf('\n               directed graph commands.')
fprintf('\n')
fprintf('\n==============================================================')
fprintf('\n')
fprintf('\n==============================================================')
fprintf('\n OPENING HOUSEKEEPING')
fprintf('\n==============================================================')
fprintf('\n  --> start timer\n')
tic

fprintf('\n  --> clear memory\n')
clear


fprintf('\n==============================================================')
fprintf('\n SET DIRECTORIES AND PARAMETERS')
fprintf('\n==============================================================')
fprintf('\n  --> set paths and macros not passed from Stata\n')
try_path_4 = 'E:/project_CBFW/int_files'; % same as $files in 0.MASTER.do
if exist(try_path_4, 'dir')
    DIR_INPUT = try_path_4;
    DIR_OUTPUT = try_path_4;
    DIR_LOG = 'E:/project_CBFW/log_files'; % same as $logs in 0.MASTER.do
else
    fprintf('\nUSER ERROR: Directory not found.\n')
    exit
end
iter_max = 1e5; % number of iterations of PageRank before aborting
iter_tol = 1e-7; % tolerance for PageRank convergence

fprintf('\n  --> read parameters\n')
params_dir = [DIR_INPUT '/parameters_pagerank.csv']; % Path to file containing PageRank parameters.
file_params = fopen(params_dir);
data_params = fscanf(file_params, '%f %f %f', [3 inf])';
fclose(file_params);
clear file_params
pagerank_weight = data_params(:, 1); % Use weighted nodes? 0 = no; 1 = yes..
fprintf('\n')
disp(['pagerank_weight = ' num2str(pagerank_weight)])
pagerank_damping = data_params(:, 2); % Damping factor (between 0.00 and 1.00), which is the probability that a random surfer clicks on a link on the current page, instead of continuing on another random page. Standard value in computer science = 0.85, but Sorkin (2018, QJE) sets = 1.00.
fprintf('\n')
disp(['pagerank_damping = ' num2str(pagerank_damping)])
ext = data_params(:, 3); % File name extension string.
fprintf('\n')
disp(['ext = ' num2str(ext)])
clear data_params

fprintf('\n  --> set read and write paths\n')
input_dir = [DIR_INPUT '/pagerank_input_', num2str(ext), '.csv']; % Path to file containing inputs to PageRank estimation.
output_dir = [DIR_OUTPUT '/pagerank_output_', num2str(ext), '.txt']; % Path to file where MATLAB output is exported to.
stop_dir = [DIR_OUTPUT '/stoppr_' num2str(ext) '.txt']; %Path to file to tell Stata to continue running
clear DIR_INPUT DIR_OUTPUT

fprintf('\n  --> start diary (log) file\n')
FILE_LOG = [DIR_LOG, '/log_pagerank_matlab_', num2str(ext), '.log'];
eval(['diary ''', FILE_LOG, ''''])
clear DIR_LOG FILE_LOG ext


fprintf('\n==============================================================')
fprintf('\n COMPUTE PAGERANKS')
fprintf('\n==============================================================')
fprintf('\n  --> read input data\n')
input_file = fopen(input_dir, 'r');
input_format = '%f %f'; % always load at least 2 variables: parent employer ID, child employer ID
input_n = 2;
if pagerank_weight
    input_format = [input_format, ' %f'];
    input_n = input_n + 1;
end
fprintf('\n')
disp(['number of variables in input data = ' int2str(input_n) ' (format = ' input_format ')']);
input_data = fscanf(input_file, input_format, [input_n inf])';
fclose(input_file);
parent = input_data(:, 1); % Input data column 1: parent employer ID.
child = input_data(:, 2); % Input data column 2: child employer ID.
col_counter = 2;
if pagerank_weight
    col_counter = col_counter + 1;
    weight = input_data(:, col_counter); % Additional input data: importance weight for each node.
end
clear input_dir input_file input_format input_n input_data col_counter

fprintf('\n  --> create unique parent IDs and child IDs -- should already be done in Stata!\n')
[id_unique, ~, index] = unique([parent; child]); % Vector of unique employer IDs, and index vector that maps vector of unique employer IDs into original employer ID vector
parent = index(1:size(index, 1)/2); % This is a transformation of the original parent IDs, which starts counting employers from 1.
child = index(size(index, 1)/2 + 1:end); % This is a transformation of the original child IDs, which starts counting employers from 1.
clear index

fprintf('\n  --> compute PageRank\n')
if pagerank_weight
    G = digraph(parent, child, weight); % -digraph- command constructs directed multigraph from three inputs: (1) source node number; (2) target node number; and (3) importance weights.
    clear weight
else
    G = digraph(parent, child); % -digraph- command constructs directed multigraph from two inputs: (1) source node number; and (2) target node number.
end
clear pagerank_weight parent child
pagerank = centrality(G, 'pagerank', 'FollowProbability', pagerank_damping, 'MaxIterations', iter_max, 'Tolerance', iter_tol);
clear G pagerank_damping iter_max iter_tol

whos

fprintf('\n  --> write estimation results to tab-delimited output file\n')
output_file = fopen(output_dir, 'w');
out_header_format = '%14s\t %20s\n';
out_header = {'empid', 'pagerank'};
out_data_format = '%14.0f\t %20.18f\n';
out_data = [id_unique'; pagerank'];
fprintf(output_file, out_header_format, out_header{:});
fprintf(output_file, out_data_format, out_data);
fclose(output_file);
clear out_header_format out_header out_data_format out_data output_file output_dir id_unique pagerank


fprintf('\n==============================================================')
fprintf('\n CLOSING HOUSEKEEPING')
fprintf('\n==============================================================')

fid = fopen(stop_dir, 'w+');
fclose(fid);

fprintf('\n  --> summarize objects stored in memory\n')
whos

fprintf('\n  --> clear parameters input\n')
fclose('all');
eval(['delete ', params_dir])
clear params_dir

fprintf('\n  --> clear memory\n')
clear

fprintf('\n  --> end timer\n')
toc

fprintf('\n  --> close diary (log) file\n')
fprintf('\n\n\n\n\n\n\n\n\n\n\n\n')
diary close

fprintf('\n  --> exit\n')
exit